package org.anyline.simple.spider;

import org.anyline.entity.DataRow;
import org.anyline.entity.DataSet;
import org.anyline.net.HttpUtil;
import org.anyline.util.regular.RegularUtil;

import java.util.HashMap;
import java.util.Map;

public class QQSpider {
    public static void main(String[] args) {
        //打开一个页面复制浏览器地址
        download("https://ke.qq.com/course/2737483/9558853446911307#term_id=102844777");
    }

    /**
     *
     * @param url 播放页面 浏览器地址
     */
    public static void download(String url){
        String course = RegularUtil.cut(url, "course", "/", "/");
        String term = RegularUtil.cut(url, "term_id=", RegularUtil.TAG_END);
        //读取页面源码
        String txt = HttpUtil.get(url).getText();
        //解析其中的课程列表 就是右侧的课程目录  一个教程内一般会有多个视频
        //先把json拆出来
        String json_txt = RegularUtil.cut(txt, "__NEXT_DATA__", ">", "</script>");
        //解析成DataRow格式
        DataRow json = DataRow.parseJson(json_txt);
        //取其中的课程列表
        DataRow catalogMap = (DataRow)json.recursion("props","pageProps","courseInfo","catalogMap");
        DataSet items = catalogMap.getSet(term);
        for(DataRow item:items){
            //视频列表
            DataSet subs = item.getSet("sub_info");
            for(DataRow sub:subs){
                DataSet tasks = sub.getSet("task_info");
                for(DataRow task:tasks){//上几层for应该只有一个元素，直接getRow(0)也可以
                    String name = task.getString("name"); //章节标题
                    String taid = task.getString("taid"); //用来生成视频文件的连接
                    //这一步会检测header
                    String video_url = "https://ke.qq.com/cgi-proxy/rec_video/describe_rec_video?course_id="+course+"&file_id="+taid+"&header=%7B%22uin%22%3A%221909366099%22%2C%22srv_appid%22%3A201%2C%22cli_appid%22%3A%22ke%22%2C%22cli_info%22%3A%7B%22cli_platform%22%3A3%7D%7D&term_id=102844777&vod_type=0&bkn=667121205&r=0.3613";
                    Map<String, String> header = new HashMap<>();
                    header.put("accept","*/*");
                    header.put("accept-language","zh-CN,zh;q=0.9");
                    header.put("priority","u=1, i");
                    header.put("referer",url);
                    header.put("sec-ch-ua","Windows");
                    header.put("sec-ch-ua-mobile","?0");
                    header.put("sec-fetch-dest","empty");
                    header.put("sec-fetch-mode","cors");
                    header.put("sec-fetch-site","same-origin");
                    header.put("user-agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36");
                    header.put("x-request-id","f966d9ca-b45d-32e8-2c45-5c78d82d8c99");

                    txt = HttpUtil.get(header, url).getText();
                    System.out.println(txt);
                    /*
                    *
                    *
  -H "accept: * /*" curl 'https://ke.qq.com/cgi-proxy/rec_video/describe_rec_video?course_id=2737483&file_id=5285890803997328557&header=%7B%22uin%22%3A%221909366099%22%2C%22srv_appid%22%3A201%2C%22cli_appid%22%3A%22ke%22%2C%22cli_info%22%3A%7B%22cli_platform%22%3A3%7D%7D&term_id=102844777&vod_type=0&bkn=667121205&r=0.3613' \
  -H 'accept: * /*' \
  -H 'accept-language: zh-CN,zh;q=0.9' \
  -H 'cookie: pac_uid=0_51ddefe8a740d; iip=0; pgv_pvid=2471814440; RK=2n+st9fZHV; ptcz=865c99e48014a83d3f76cce3cde1338b39d0eb585854babe09131e6631670955; _qimei_uuid42=186020b201c100aa2790441de14a5b549a232aa1ed; suid=0_51ddefe8a740d; _qimei_fingerprint=48df96282520a7bcd203fd33f7b6ee8a; _qimei_q36=; _qimei_h38=8f95179e2790441de14a5b5402000007318602; _clck=1w6pvpg|1|fmm|0; auth_version=2.0; mix_login_mode=true; uid_type=0; uin=1909366099; p_uin=1909366099; p_luin=1909366099; uid_uin=1909366099; uid_a2=7ad4f19175a1fb401ebcf61482c6ae31e63a05504a49f281396453040eef956ec2f8dc742ddf97c773267754e02658eb1510b4c04a567a724016983a8138044f6b4e478f839f386d; uid_origin_uid_type=0; uid_origin_auth_type=1003; ke_login_type=1; ts_uid=6956173200; Hm_lvt_0c196c536f609d373a16d246a117fd44=1720284673; tdw_data_flowid=; tdw_data_testid=; tdw_data_new_2={"auin":"-","sourcetype":"","sourcefrom":"","ver9":"1909366099","uin":"1909366099","visitor_id":"9741540809682776","ver10":"","url_page":"course","url_module":"","url_position":""}; report_position_key={"url_position":"","url_module":"","url_page":"course"}' \
  -H 'priority: u=1, i' \
  -H 'referer: https://ke.qq.com/course/2737483/9558853446911307' \
                    -H 'sec-ch-ua: "Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"' \
                    -H 'sec-ch-ua-mobile: ?0' \
                    -H 'sec-ch-ua-platform: "Windows"' \
                    -H 'sec-fetch-dest: empty' \
                    -H 'sec-fetch-mode: cors' \
                    -H 'sec-fetch-site: same-origin' \
                    -H 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36' \
                    -H 'x-request-id: f966d9ca-b45d-32e8-2c45-5c78d82d8c99'*/
                    System.out.println(name);
                    System.out.println(taid);
                }
            }

        }
        /*
        {
    "props": {
        "pageProps": {
            "courseInfo": {
                "status": 2,
                "err": null,
                "data": Object{...},
                "curTermId": -1,
                "curTerm": Object{...},
                "catalogStatus": 2,
                "catalogErr": null,
                "catalogMap": {
                    "102844777": [
                        {
                            "ch_id": 2967813,
                            "introduce": "",
                            "ch_no": 0,
                            "name": "",
                            "sub_info": [
                                {
                                    "csid": 16252645,
                                    "sub_id": 0,
                                    "introduce": "",
                                    "name": "Netty 8小时 快速入门",
                                    "endtime": 0,
                                    "term_id": 102844777,
                                    "task_info": [
                                        {
                                            "restrict_flag": 0,
                                            "create_time": 1592274127,
                                            "csid": 16252645,
                                            "introduce": "",
                                            "special_flag": 0,
                                            "endtime": 0,
                                            "resid_ext": "{\u0026quot;times\u0026quot;:1237,\u0026quot;txcloud\u0026quot;:1,\u0026quot;vid\u0026quot;:\u0026quot;\u0026quot;}",
                                            "term_id": 102844777,
                                            "type": 2,
                                            "bgtime": 0,
                                            "expr_flag": 0,
                                            "te_list": [
                                                3253161180
                                            ],
                                            "name": "BIO/NIO/AIO三种IO模式概述",
                                            "task_bit_flag": 0,
                                            "resid_list": "5285890803997328557",
                                            "tu_list": [
                                                3253161180
                                            ],
                                            "expr_range": 0,
                                            "append_flag": 0,
                                            "aid": 121675,
                                            "taid": "9558853446911307",
                                            "cid": 2737483
                                        },
                                        Object{...},
                                        Object{...},*/
        //ts文件列有
        //https://ke.qq.com/cgi-proxy/rec_video/describe_rec_video?course_id=2737483&file_id=5285890803997328557&header=%7B%22uin%22%3A%221909366099%22%2C%22srv_appid%22%3A201%2C%22cli_appid%22%3A%22ke%22%2C%22cli_info%22%3A%7B%22cli_platform%22%3A3%7D%7D&term_id=102844777&vod_type=0&bkn=667121205&r=0.3613
    }
}
